
import numpy as np
import random
import matplotlib.pyplot as plt
from collections import deque
import pickle

def moving_average(data, window_size=10):
    return np.convolve(data, np.ones(window_size)/window_size, mode='valid')


# In[41]:


with open('PCVaR-Q_reward_cvar.pickle', 'rb') as f:
    reward_cvar = pickle.load(f)
    
    
with open('CVaR-Q_cvar.pickle', 'rb') as f:
    cvar_hists_baseline = pickle.load(f)
with open('PCVaR-Q_cvar.pickle', 'rb') as f:
    cvar_hists_Qcvar = pickle.load(f)


# In[53]:


q = 0.1
reward_RN = np.random.normal(5, 3**(1/2),100000)
var_RN = np.percentile(reward_RN, q * 100)
cvar_RN = np.mean(reward_RN[reward_RN<= var_RN])
var_cvar = np.percentile(reward_cvar, q * 100)
cvar_cvar = np.mean(reward_cvar[reward_cvar<= var_cvar])
plt.figure(figsize=(12, 6))
plt.hist(reward_RN, bins=30, alpha=0.4, label='RN', color='royalblue', edgecolor='black', density=True)
plt.hist(reward_cvar, bins=30, alpha=0.6, label='PCVaR-Q', color='darkorange', edgecolor='black', density=True)
plt.axvline(cvar_RN, color='blue', linestyle='--', linewidth=2, label='CVaR of RN ')
plt.axvline(cvar_cvar, color='red', linestyle='--', linewidth=2, label='CVaR of PCVaR-Q')
plt.xlabel('Reward')
plt.ylabel('Density')
plt.legend(fontsize = 16)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("Figure 2 (a).png", dpi=300) 
plt.close()

eval_points = np.arange(500, 6000, 500)
mean_cvar_Qcvar = np.mean(cvar_hists_Qcvar, axis=0)
std_cvar_Qcvar = np.std(cvar_hists_Qcvar, axis=0)
mean_cvar_baseline = np.mean(cvar_hists_baseline, axis=0)
std_cvar_baseline = np.std(cvar_hists_baseline, axis=0)
plt.figure(figsize=(12, 6))
for i in range(10):
    plt.plot(eval_points, cvar_hists_Qcvar[i], linestyle='--', color='darkorange', alpha=0.3)
    plt.plot(eval_points, cvar_hists_baseline[i], linestyle='--', color='royalblue', alpha=0.3)
plt.plot(eval_points, mean_cvar_Qcvar, color='darkorange', marker='o', linewidth=2.5, label='PCVaR-Q')
plt.fill_between(eval_points,
                 mean_cvar_Qcvar - std_cvar_Qcvar,
                 mean_cvar_Qcvar + std_cvar_Qcvar,
                 color='darkorange', alpha=0.2)
plt.plot(eval_points, mean_cvar_baseline, color='royalblue', marker='s', linewidth=2.5, label='CVaR-Q')
plt.fill_between(eval_points,
                 mean_cvar_baseline - std_cvar_baseline,
                 mean_cvar_baseline + std_cvar_baseline,
                 color='royalblue', alpha=0.2)
plt.axhline(y=2.5, color='red', linestyle='--', linewidth=2, label='Opt')
plt.ylabel('Estimated CVaR', fontsize=12)
plt.xlabel('Training Episode', fontsize=12)
plt.xticks(eval_points)
plt.legend(fontsize=16, loc='lower right')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("Figure 2 (b).png", dpi=300) 
plt.close()
